#### setting environment ####
require(quanteda)
require(stm)
require(runjags)
require(coda)
require(RColorBrewer)

# set color palette to draw Figure 2(b)
blue.colors <- colorRamp(brewer.pal(9, "PuBu")[c(1, 6)], space = "rgb")

newspaper.names <- c("Asahi", "Chugoku", "Chunichi", "Hokkaido", "Kahoku", 
                     "Mainichi", "Nikkei", "Nishinippon", "Sankei", "Yomiuri")

#### data preparation ####
# estimation results of the 65-topic model
load("STM_result_65.Rdata")
# estimation results of the Wordfish models
load("Study2_Wordfish_result.Rdata")
# estimation results of the factor analysis models
load("FA_result.Rdata")

#### newspapers' ideal points covering various issues (Figure 2) ####
# posterior draws of ideal points (multiplied by 10 to adjust the scale to other works, see Online Appendix F.1)
Study2.ideal.points.draws <- t(rbind(FA.result$mcmc[[1]][, 1:10], 
                                     FA.result$mcmc[[2]][, 1:10], 
                                     FA.result$mcmc[[3]][, 1:10])) * 10
rownames(Study2.ideal.points.draws) <- newspaper.names

## summary statistics
# point estimates (posterior means)
Study2.ideal.points.mean <- rowMeans(Study2.ideal.points.draws)
# 95% credible intervals (highest posterior density intervals)
Study2.ideal.points.CI <- HPDinterval(as.mcmc(t(Study2.ideal.points.draws)))
# descending order of the point estimates
Study2.ideal.points.order <- order(Study2.ideal.points.mean, decreasing = TRUE)

## posterior probability that newspaper i's ideal point exceeds the ideal point of newspaper j
prob.more.rightist <- matrix(NA, 10, 10)
rownames(prob.more.rightist) <- colnames(prob.more.rightist) <- newspaper.names
for (i in 1:10) {
  for (j in 1:10) {
    prob.more.rightist[i, j] <- mean(Study2.ideal.points.draws[i, ] - 
                                       Study2.ideal.points.draws[j, ] > 0)
  }
}
# sort matrix in the ascending order of the point estimates
prob.more.rightist.ordered <- t(apply(prob.more.rightist[Study2.ideal.points.order, Study2.ideal.points.order], 1, rev))
round(prob.more.rightist.ordered, 2)

## draw the figure
cairo_pdf("Figure_2.pdf",
          width = 10, height = 6, pointsize = 11, family = "Helvetica")
layout(matrix(c(1, 2), 1, 2), widths = c(0.9, 1.1))
par(mar = c(5, 1, 5, 1))
dotchart(Study2.ideal.points.mean[Study2.ideal.points.order], pch = 19, xlim = c(-1.1, 1.6), 
         xlab = "Estimated Ideal Points")
segments(Study2.ideal.points.CI[Study2.ideal.points.order, 1], 1:10, 
         Study2.ideal.points.CI[Study2.ideal.points.order, 2], 1:10)
title("(a) Estimation results of the ideal points of\nJapanese newspapers")
par(mar = c(6, 1, 6, 2))
plot(NULL, NULL, type = "n", xlim = c(-4, 10 + 1.5), ylim = c(-0.5, 10 + 0.5),
     xlab = "", ylab = "", axes = FALSE)
for (i in 1:10) {
  for (j in 1:10) {
    if (j >= 10 + 1 - i) break
    polygon(c(j - 1 + 0.05, j - 1 + 0.05, j - 0.05, j - 0.05), c(i - 1 + 0.05, i - 0.05, i - 0.05, i - 1 + 0.05),
            border = NA, col = rgb(blue.colors(prob.more.rightist.ordered[i, j] ^ 8) / 255))
  }
}
for (i in 1:100) {
  polygon(c(10 + 0.5, 10 + 0.5, 10 + 1.5, 10 + 1.5), 
          (c(i - 1, i, i, i - 1) * 0.01) * 10,
          border = rgb(blue.colors((0.5 + i * 0.005) ^ 8) / 255),
          col = rgb(blue.colors((0.5 + i * 0.005) ^ 8) / 255))
}
axis(4, at = seq(0, 10, 10 / 10), 
     labels = rep("", 11), line = -0.75, lwd = 0.5)
mtext(sprintf("%2.2f", seq(0.5, 1, 0.05)), side = 4, at = seq(0, 10, 10 / 10), 
      cex = 0.8, line = 0, las = 1)
text(rep(-4, 10), 1:10 - 0.5, 
     labels = paste0(10:1, ". ", newspaper.names[Study2.ideal.points.order]), pos = 4)
text(1:10 - 0.5, rep(0, 10), 
     labels = 1:10, pos = 1)
title("(b) Comparison between newspapers' ideal points", line = 3.8)
dev.off()

#### topics and terms related and unrelated to the left-right dimension (Tables 2 and 3) ####
# posterior draws of factor loadings (divided by 10 to adjust the scale to other works, see Online Appendix F.1)
factor.loadings.draws <- t(rbind(FA.result$mcmc[[1]][, 11:75], 
                                 FA.result$mcmc[[2]][, 11:75], 
                                 FA.result$mcmc[[3]][, 11:75])) / 10

## summary statistics
# point estimates (posterior means)
factor.loadings.mean <- rowMeans(factor.loadings.draws)
# descending order of the point estimates
factor.loadings.order <- order(abs(factor.loadings.mean), decreasing = TRUE)

## topics whose absolute value of the factor loading is in the top five
# top five frequent and exclusive terms
frex.terms.related <- labelTopics(STM.result, n = 5)$frex[factor.loadings.order[1:5], ]
print(frex.terms.related, quote = FALSE)
# factor loadings
round(factor.loadings.mean[factor.loadings.order[1:5]], 2)

# terms frequently used by left and right newspapers
negative.terms.related <- positive.terms.related <- matrix(NA, 5, 5)
for (i in 1:5) {
  # Wordfish result of the i-th most related topic
  Wordfish.result <- Study2.wordfish.result[[factor.loadings.order[i]]]
  # threshold of psi to evaluate whether each term is frequently used in general
  psi.threshold <- quantile(Wordfish.result$psi, 0.75)
  # ascending order of term's coefficients
  term.coef.ranking <- order(Wordfish.result$beta[Wordfish.result$psi > psi.threshold])
  # top five terms.related frequently used by left newspapers
  negative.terms.related[i, ] <- (Wordfish.result$features[Wordfish.result$psi > psi.threshold])[head(term.coef.ranking, 5)]
  # top five terms.related frequently used by right newspapers
  positive.terms.related[i, ] <- (Wordfish.result$features[Wordfish.result$psi > psi.threshold])[rev(tail(term.coef.ranking, 5))]
}
print(negative.terms.related, quote = FALSE)
print(positive.terms.related, quote = FALSE)

## topics whose absolute value of the factor loading is in the bottom five
# top five frequent and exclusive terms
frex.terms.unrelated <- labelTopics(STM.result, n = 5)$frex[factor.loadings.order[65:61], ]
print(frex.terms.unrelated, quote = FALSE)
# factor loadings
round(factor.loadings.mean[factor.loadings.order[65:61]], 2)

#### list of all topics (Table A.1) ####
topics <- c("Constitutional revision", "The 2017 general election", "North Korea", "Nuclear power plants", 
            "U.S.-Iran relationship", "Official document", "Diet deliberations", "Marine Corps Air Station Futenma", 
            "Prime minister's scandals", "National security", "Ministry of Finance", "Japan-Korea relationship", 
            "Local government", "International nuclear disarmament", "Criminal trial", "Tax and pension", 
            "China's diplomatic policy", "Trade", "Labor law", "Civil Code reform", "Broadcasting", "Syria", 
            "Integrated Resort Bill", "Employment", "Freedom of speech", "Financial policy", 
            "Israeli-Palestinian conflict", "Entrance examination reform", "National finance", 
            "Corporate governance", "Antismoking bill", "Electoral systems", "Bank", "Olympic Games", 
            "Nuclear fuel", "Nursery school", "Medicine", "Sumo", "Manufacturing scandals", 
            "Agriculture, forestry, and fisheries", "Cryptocurrency", "Child welfare", "Education", "History", 
            "State budget", "Industrial relations", "Chinese politics", "Electric power", "Culture", "MEXT", 
            "Immigration", "Harassment in sports", "Enormous rain disaster", "European politics", 
            "Employment of the disabled", "Science", "Japan-Russia relationship", "Crime", "Natural disaster", 
            "Regional revitalization", "Administrative lawsuit", "Imperial Household", "Computer game", 
            "Sports", "Traffic")

data.frame(topics,  # topic labels
           labelTopics(STM.result, n = 5)$frex[factor.loadings.order, ],  # frequent and exclusive terms
           proportion = round(colMeans(STM.result$theta)[factor.loadings.order] * 100, 2),  # topic proportions
           loadings = round(factor.loadings.mean[factor.loadings.order], 2))  # point estimates of factor loadings

#### illustration of topic-specific positions (Figure A.3 and A.4) ####
## top ten topics most related to the left-right dimension
topic.labels.1 <- topics[1:10]

# topic IDs
illustrated.topics.1 <- factor.loadings.order[1:10]

# record topic-specific positions and their 95% confidence intervals
illustrated.topic.specific.positions.1 <- array(NA, c(10, 10, 3))
for (i in 1:10) {
  Wordfish.result <- Study2.wordfish.result[[illustrated.topics.1[i]]]
  illustrated.topic.specific.positions.1[i, , 1] <- Wordfish.result$theta
  illustrated.topic.specific.positions.1[i, , 2] <- Wordfish.result$theta - Wordfish.result$se.theta * 1.96
  illustrated.topic.specific.positions.1[i, , 3] <- Wordfish.result$theta + Wordfish.result$se.theta * 1.96
}

# draw the figure
cairo_pdf("Figure_A3.pdf",
          width = 8, height = 4.6, pointsize = 6, family = "Meiryo")
par(mar = c(1.5, 0, 1.5, 0))
layout(matrix(c(1, 2), 2, 1))
plot(NULL, NULL, type = "n", xlim = c(-2, 34), ylim = c(0.5, 10 + 0.5),
     xlab = "", ylab = "", axes = FALSE)
segments(rep(0, 10), 1:10, 
         rep(34, 10), 1:10, 
         lty = 3, lwd = 0.5, col = "gray")
text(rep(-3, 10), 1:10, newspaper.names[Study2.ideal.points.order], pos = 4)
for (i in 1:5) {
  points(3 + 7 * (i - 1) + illustrated.topic.specific.positions.1[i, Study2.ideal.points.order, 1], 
         1:10, pch = 20)
  segments(3 + 7 * (i - 1) + illustrated.topic.specific.positions.1[i, Study2.ideal.points.order, 2], 1:10, 
           3 + 7 * (i - 1) + illustrated.topic.specific.positions.1[i, Study2.ideal.points.order, 3], 1:10, lwd = 0.5)
  axis(1, at = c(7 * (i - 1), 3 + 7 * (i - 1), 6 + 7 * (i - 1)), labels = rep("", 3), lwd = 0.5)
  mtext(c("-3", "0", "3"), side = 1, at = c(7 * (i - 1), 3 + 7 * (i - 1), 6 + 7 * (i - 1)),
        line = 0.5, cex = 0.8)
}
mtext(topic.labels.1[1:5], side = 3, at = seq(3, 31, 7))
plot(NULL, NULL, type = "n", xlim = c(-2, 34), ylim = c(0.5, 10 + 0.5),
     xlab = "", ylab = "", axes = FALSE)
segments(rep(0, 10), 1:10, 
         rep(34, 10), 1:10, 
         lty = 3, lwd = 0.5, col = "gray")
text(rep(-3, 10), 1:10, newspaper.names[Study2.ideal.points.order], pos = 4)
for (i in 1:5) {
  points(3 + 7 * (i - 1) + illustrated.topic.specific.positions.1[i + 5, Study2.ideal.points.order, 1], 
         1:10, pch = 20)
  segments(3 + 7 * (i - 1) + illustrated.topic.specific.positions.1[i + 5, Study2.ideal.points.order, 2], 1:10, 
           3 + 7 * (i - 1) + illustrated.topic.specific.positions.1[i + 5, Study2.ideal.points.order, 3], 1:10, lwd = 0.5)
  axis(1, at = c(7 * (i - 1), 3 + 7 * (i - 1), 6 + 7 * (i - 1)), labels = rep("", 3), lwd = 0.5)
  mtext(c("-3", "0", "3"), side = 1, at = c(7 * (i - 1), 3 + 7 * (i - 1), 6 + 7 * (i - 1)),
        line = 0.5, cex = 0.8)
}
mtext(topic.labels.1[6:10], side = 3, at = seq(3, 31, 7))
dev.off()

## top ten topics least related to the left-right dimension
topic.labels.2 <- topics[65:56]

# topic IDs
illustrated.topics.2 <- factor.loadings.order[65:56]

# record topic-specific positions and their 95% confidence intervals
illustrated.topic.specific.positions.2 <- array(NA, c(10, 10, 3))
for (i in 1:10) {
  Wordfish.result <- Study2.wordfish.result[[illustrated.topics.2[i]]]
  illustrated.topic.specific.positions.2[i, , 1] <- Wordfish.result$theta
  illustrated.topic.specific.positions.2[i, , 2] <- Wordfish.result$theta - Wordfish.result$se.theta * 1.96
  illustrated.topic.specific.positions.2[i, , 3] <- Wordfish.result$theta + Wordfish.result$se.theta * 1.96
}

# draw the figure
cairo_pdf("Figure_A4.pdf",
          width = 8, height = 4.6, pointsize = 6, family = "Meiryo")
par(mar = c(1.5, 0, 1.5, 0))
layout(matrix(c(1, 2), 2, 1))
plot(NULL, NULL, type = "n", xlim = c(-2, 34), ylim = c(0.5, 10 + 0.5),
     xlab = "", ylab = "", axes = FALSE)
segments(rep(0, 10), 1:10, 
         rep(34, 10), 1:10, 
         lty = 3, lwd = 0.5, col = "gray")
text(rep(-3, 10), 1:10, newspaper.names[Study2.ideal.points.order], pos = 4)
for (i in 1:5) {
  points(3 + 7 * (i - 1) + illustrated.topic.specific.positions.2[i, Study2.ideal.points.order, 1], 
         1:10, pch = 20)
  segments(3 + 7 * (i - 1) + illustrated.topic.specific.positions.2[i, Study2.ideal.points.order, 2], 1:10, 
           3 + 7 * (i - 1) + illustrated.topic.specific.positions.2[i, Study2.ideal.points.order, 3], 1:10, lwd = 0.5)
  axis(1, at = c(7 * (i - 1), 3 + 7 * (i - 1), 6 + 7 * (i - 1)), labels = rep("", 3), lwd = 0.5)
  mtext(c("-3", "0", "3"), side = 1, at = c(7 * (i - 1), 3 + 7 * (i - 1), 6 + 7 * (i - 1)),
        line = 0.5, cex = 0.8)
}
mtext(topic.labels.2[1:5], side = 3, at = seq(3, 31, 7))
plot(NULL, NULL, type = "n", xlim = c(-2, 34), ylim = c(0.5, 10 + 0.5),
     xlab = "", ylab = "", axes = FALSE)
segments(rep(0, 10), 1:10, 
         rep(34, 10), 1:10, 
         lty = 3, lwd = 0.5, col = "gray")
text(rep(-3, 10), 1:10, newspaper.names[Study2.ideal.points.order], pos = 4)
for (i in 1:5) {
  points(3 + 7 * (i - 1) + illustrated.topic.specific.positions.2[i + 5, Study2.ideal.points.order, 1], 
         1:10, pch = 20)
  segments(3 + 7 * (i - 1) + illustrated.topic.specific.positions.2[i + 5, Study2.ideal.points.order, 2], 1:10, 
           3 + 7 * (i - 1) + illustrated.topic.specific.positions.2[i + 5, Study2.ideal.points.order, 3], 1:10, lwd = 0.5)
  axis(1, at = c(7 * (i - 1), 3 + 7 * (i - 1), 6 + 7 * (i - 1)), labels = rep("", 3), lwd = 0.5)
  mtext(c("-3", "0", "3"), side = 1, at = c(7 * (i - 1), 3 + 7 * (i - 1), 6 + 7 * (i - 1)),
        line = 0.5, cex = 0.8)
}
mtext(topic.labels.2[6:10], side = 3, at = seq(3, 31, 7))
dev.off()